'''获取南县建设项目环评'''
#   这个傻逼网址不是连续的
# http://www.nanxian.gov.cn/14366/14516/14799/14662/14811/index_2.htm


import requests
import codecs
import os
from bs4 import BeautifulSoup as bs
import cssselect
from lxml.html import etree
import re


# https://www.anxiang.gov.cn/xxgk/zfxxgkml/jdjcqk/hjbh/jsxmhp


n = 13    # 13
m = 0
for x in range(n):
    x = x + 1
    url = 'http://www.nanxian.gov.cn/14366/14516/14799/14662/14811/index_%s.htm' %x
    r = requests.get(url)
    r.encoding = 'gb2312'
    # print(r.encoding)

    data = r.text
    soup = bs(r.text, 'html.parser')
    # print(data)
    # print(soup.prettify())

    #

    a = soup.find("div", class_="tllb_rg_con")
    # print(type(a))
    for i in a.children:
        if i.name == 'ul':
            # print(type(i))
            for j in i.children:
                if j.name == 'li':
                    for k in j.children:
                        if k.name == "span":
                            time = j.text
                            # print('time=', time)
                            print(time)
                        elif j.name == "a":
                            name = j.string
                            name = re.sub("[^a-zA-Z0-9\u4e00-\u9fa5]", '', name)
                            # print('name=',name)
                            print(name)
                            href = 'http://www.nanxian.gov.cn/14366/14516/14799/14662/14811/' + j['href']
                            # print('href=',href)
                            print(href)
                            m = m + 1
print(m)